# If running scripts one after the other, then should remove some elements because names are reused
# rm(list=ls())
Sys.setlocale("LC_CTYPE", "EN_US.UTF-8")
setwd("~/Dropbox/Dissertation/Ideological_Labels/Replication")
library(foreign)
library(broom)
library(scales)
options(scipen = 999)



## Read in Data
data <- read.dta("2008_Political_Culture_Survey_Data.dta")


################################################################################################
############################ Recoding  Variables  ##############################################
################################################################################################

## Ideological Self-Identification

# 在下面的量表中,0代表左派,10代表右派,请问您认为您的倾向是什么?

data$own.ideology <- data$h7
data$own.ideology[data$h7 %in% 88:99] <- NA


# B12a.  即使可以选择世界上任何国家,我也更愿意作中国公民

data$prefer.chinese.citizenship[data$b12a == "agree strongly"] <- 5
data$prefer.chinese.citizenship[data$b12a == "agree"] <- 4
data$prefer.chinese.citizenship[data$b12a == "neither agree nor disagree"] <- 3
data$prefer.chinese.citizenship[data$b12a == "disagree"] <- 2
data$prefer.chinese.citizenship[data$b12a == "disagree strongly"] <- 1
data$prefer.chinese.citizenship[data$b12a == "don't know"] <- NA
data$prefer.chinese.citizenship[data$b12a == "no answer"] <- NA

# B12b. 当前中国存在一些让我感到羞愧的事情

data$china.shameful[data$b12b == "agree strongly"] <- 5
data$china.shameful[data$b12b == "agree"] <- 4
data$china.shameful[data$b12b == "neither agree nor disagree"] <- 3
data$china.shameful[data$b12b == "disagree"] <- 2
data$china.shameful[data$b12b == "disagree strongly"] <- 1
data$china.shameful[data$b12b == "don't know"] <- NA
data$china.shameful[data$b12b == "no answer"] <- NA

# B12c. 假如外国人更像中国人,世界将变得更好

data$chinese.paragon[data$b12c == "agree strongly"] <- 5
data$chinese.paragon[data$b12c == "agree"] <- 4
data$chinese.paragon[data$b12c == "neither agree nor disagree"] <- 3
data$chinese.paragon[data$b12c == "disagree"] <- 2
data$chinese.paragon[data$b12c == "disagree strongly"] <- 1
data$chinese.paragon[data$b12c == "don't know"] <- NA
data$chinese.paragon[data$b12c == "no answer"] <- NA

# B12d. 总体说来,中国比其他大多数国家都好

data$china.best[data$b12d == "agree strongly"] <- 5
data$china.best[data$b12d == "agree"] <- 4
data$china.best[data$b12d == "neither agree nor disagree"] <- 3
data$china.best[data$b12d == "disagree"] <- 2
data$china.best[data$b12d == "disagree strongly"] <- 1
data$china.best[data$b12d == "don't know"] <- NA
data$china.best[data$b12d == "no answer"] <- NA

# B12e. 任何人都应该支持自己的政府,即使它做 的不对

data$support.wrong.gov[data$b12e == "agree strongly"] <- 5
data$support.wrong.gov[data$b12e == "agree"] <- 4
data$support.wrong.gov[data$b12e == "neither agree nor disagree"] <- 3
data$support.wrong.gov[data$b12e == "disagree"] <- 2
data$support.wrong.gov[data$b12e == "disagree strongly"] <- 1
data$support.wrong.gov[data$b12e == "don't know"] <- NA
data$support.wrong.gov[data$b12e == "no answer"] <- NA

# B12f. 当我国运动员取得优异成绩时,我以作为 中国人而荣

data$shared.athletic.glory[data$b12f == "agree strongly"] <- 5
data$shared.athletic.glory[data$b12f == "agree"] <- 4
data$shared.athletic.glory[data$b12f == "neither agree nor disagree"] <- 3
data$shared.athletic.glory[data$b12f == "disagree"] <- 2
data$shared.athletic.glory[data$b12f == "disagree strongly"] <- 1
data$shared.athletic.glory[data$b12f == "don't know"] <- NA
data$shared.athletic.glory[data$b12f == "no answer"] <- NA

# C2b. 政府不应该干预个人的私生活

data$gov.noninterference[data$c2b == "agree strongly"] <- 5
data$gov.noninterference[data$c2b == "agree"] <- 4
data$gov.noninterference[data$c2b == "neither agree nor disagree"] <- 3
data$gov.noninterference[data$c2b == "disagree"] <- 2
data$gov.noninterference[data$c2b == "disagree strongly"] <- 1
data$gov.noninterference[data$c2b == "dont know"] <- NA
data$gov.noninterference[data$c2b == "no answer"] <- NA

# C3a. 人们对当今中国国情的看法各有不同。下列哪种说法最接近您的看法?

data$overweening.state[data$c3a == "The state is too strong."] <- 1
data$overweening.state[data$c3a == "The state is too weak."] <- 0
data$overweening.state[data$c3a == "dont know"] <- NA
data$overweening.state[data$c3a == "refuse to answer"] <- NA

# C3b. 下列哪种说法最接近您的看法? 1 自由太多 5 自由太少

data$freedom.surfeit[data$c3b == "There is too much freedom"] <- 1
data$freedom.surfeit[data$c3b == "There is not enough freedom."] <- 0
data$freedom.surfeit[data$c3b == "dont know"] <- NA
data$freedom.surfeit[data$c3b == "refuse to answer"] <- NA

# C4. 有人认为经济发展和环境保护之间存在矛盾。那么,您认为经济增长和环境保护,哪一个 更重要?

data$development.over.environ[data$c4 == "economic development is much more important"] <- 4
data$development.over.environ[data$c4 == "economic development is somewhat more important"] <- 3
data$development.over.environ[data$c4 == "environmental protection is somewhat more important"] <- 2
data$development.over.environ[data$c4 == "environmental protection is much more important"] <- 1
data$development.over.environ[data$c4 == "dont know"] <- NA
data$development.over.environ[data$c4 == "no answer"] <- NA

# D31a. 为了保护国内经济,中国应该限制外国商品进口

data$restrict.imports[data$d31a == "agree strongly"] <- 5
data$restrict.imports[data$d31a == "agree"] <- 4
data$restrict.imports[data$d31a == "neither agree nor disagree"] <- 3
data$restrict.imports[data$d31a == "disagree"] <- 2
data$restrict.imports[data$d31a == "disagree strongly"] <- 1
data$restrict.imports[data$d31a == "dont know"] <- NA
data$restrict.imports[data$d31a == "no answer"] <- NA

# D31b. 中国电视应该优先播放中国电影和节目

data$tv.protectionism[data$d31b == "agree strongly"] <- 5
data$tv.protectionism[data$d31b == "agree"] <- 4
data$tv.protectionism[data$d31b == "neither agree nor disagree"] <- 3
data$tv.protectionism[data$d31b == "disagree"] <- 2
data$tv.protectionism[data$d31b == "disagree strongly"] <- 1
data$tv.protectionism[data$d31b == "dont know"] <- NA
data$tv.protectionism[data$d31b == "no answer"] <- NA

# D31c. 进口过多的外国电影、音乐、书籍正在侵蚀我们自己的文化

data$cultural.protectionism[data$d31c == "agree strongly"] <- 5
data$cultural.protectionism[data$d31c == "agree"] <- 4
data$cultural.protectionism[data$d31c == "neither agree nor disagree"] <- 3
data$cultural.protectionism[data$d31c == "disagree"] <- 2
data$cultural.protectionism[data$d31c == "disagree strongly"] <- 1
data$cultural.protectionism[data$d31c == "dont know"] <- NA
data$cultural.protectionism[data$d31c == "no answer"] <- NA

# D31d. 如果国家没有尊严,连老百姓也会被人看不起

data$national.dignity[data$d31d == "agree strongly"] <- 5
data$national.dignity[data$d31d == "agree"] <- 4
data$national.dignity[data$d31d == "neither agree nor disagree"] <- 3
data$national.dignity[data$d31d == "disagree"] <- 2
data$national.dignity[data$d31d == "disagree strongly"] <- 1
data$national.dignity[data$d31d == "dont know"] <- NA
data$national.dignity[data$d31d == "no answer"] <- NA

# D31e. 为了国家利益,个人做出任何牺牲都是应该的

data$sacrifice.individual[data$d31e == "agree strongly"] <- 5
data$sacrifice.individual[data$d31e == "agree"] <- 4
data$sacrifice.individual[data$d31e == "neither agree nor disagree"] <- 3
data$sacrifice.individual[data$d31e == "disagree"] <- 2
data$sacrifice.individual[data$d31e == "disagree strongly"] <- 1
data$sacrifice.individual[data$d31e == "dont know"] <- NA
data$sacrifice.individual[data$d31e == "no answer"] <- NA

# D31f. 不认同中国风俗和传统的人不可能完全成为中国人

data$tradition.key[data$d31f == "agree strongly"] <- 5
data$tradition.key[data$d31f == "agree"] <- 4
data$tradition.key[data$d31f == "neither agree nor disagree"] <- 3
data$tradition.key[data$d31f == "disagree"] <- 2
data$tradition.key[data$d31f == "disagree strongly"] <- 1
data$tradition.key[data$d31f == "dont know"] <- NA
data$tradition.key[data$d31f == "no answer"] <- NA

# D31g. 中国应该采用国际人权标准来处理我国的人权问题

data$respect.intl.human.rights[data$d31g == "agree strongly"] <- 5
data$respect.intl.human.rights[data$d31g == "agree"] <- 4
data$respect.intl.human.rights[data$d31g == "neither agree nor disagree"] <- 3
data$respect.intl.human.rights[data$d31g == "disagree"] <- 2
data$respect.intl.human.rights[data$d31g == "disagree strongly"] <- 1
data$respect.intl.human.rights[data$d31g == "dont know"] <- NA
data$respect.intl.human.rights[data$d31g == "no answer"] <- NA

# E14a. 示威很容易转变成社会动乱,影响社会稳定

data$protest.chaos[data$e14a == "agree strongly"] <- 5
data$protest.chaos[data$e14a == "agree"] <- 4
data$protest.chaos[data$e14a == "neither agree nor disagree"] <- 3
data$protest.chaos[data$e14a == "disagree"] <- 2
data$protest.chaos[data$e14a == "disagree strongly"] <- 1
data$protest.chaos[data$e14a == "dont know"] <- NA
data$protest.chaos[data$e14a == "refuse to answer"] <- NA

# E14b. 一党制是当前中国最稳定的政治制度

data$single.party.stable[data$e14b == "agree strongly"] <- 5
data$single.party.stable[data$e14b == "agree"] <- 4
data$single.party.stable[data$e14b == "neither agree nor disagree"] <- 3
data$single.party.stable[data$e14b == "disagree"] <- 2
data$single.party.stable[data$e14b == "disagree strongly"] <- 1
data$single.party.stable[data$e14b == "dont know"] <- NA
data$single.party.stable[data$e14b == "refuse to answer"] <- NA

# E14c. 应该禁止示威活动

data$restrict.protests[data$e14c == "agree strongly"] <- 5
data$restrict.protests[data$e14c == "agree"] <- 4
data$restrict.protests[data$e14c == "neither agree nor disagree"] <- 3
data$restrict.protests[data$e14c == "disagree"] <- 2
data$restrict.protests[data$e14c == "disagree strongly"] <- 1
data$restrict.protests[data$e14c == "dont know"] <- NA
data$restrict.protests[data$e14c == "refuse to answer"] <- NA

# E14d. 为了解决当前复杂的经济问题,我们需要强势的政府

data$need.strong.gov[data$e14d == "agree strongly"] <- 5
data$need.strong.gov[data$e14d == "agree"] <- 4
data$need.strong.gov[data$e14d == "neither agree nor disagree"] <- 3
data$need.strong.gov[data$e14d == "disagree"] <- 2
data$need.strong.gov[data$e14d == "disagree strongly"] <- 1
data$need.strong.gov[data$e14d == "dont know"] <- NA
data$need.strong.gov[data$e14d == "refuse to answer"] <- NA

# E14e. 如果大家思想不一致,社会将陷入混乱

data$ideological.diversity.chaos[data$e14e == "agree strongly"] <- 5
data$ideological.diversity.chaos[data$e14e == "agree"] <- 4
data$ideological.diversity.chaos[data$e14e == "neither agree nor disagree"] <- 3
data$ideological.diversity.chaos[data$e14e == "disagree"] <- 2
data$ideological.diversity.chaos[data$e14e == "disagree strongly"] <- 1
data$ideological.diversity.chaos[data$e14e == "dont know"] <- NA
data$ideological.diversity.chaos[data$e14e == "refuse to answer"] <- NA

# E14f. 市场竞争对社会稳定有害

data$capitalist.chaos[data$e14f == "agree strongly"] <- 5
data$capitalist.chaos[data$e14f == "agree"] <- 4
data$capitalist.chaos[data$e14f == "neither agree nor disagree"] <- 3
data$capitalist.chaos[data$e14f == "disagree"] <- 2
data$capitalist.chaos[data$e14f == "disagree strongly"] <- 1
data$capitalist.chaos[data$e14f == "dont know"] <- NA
data$capitalist.chaos[data$e14f == "refuse to answer"] <- NA

# E14g. 多党制将导致一个国家发生政治混乱

data$multiparty.chaos[data$e14g == "agree strongly"] <- 5
data$multiparty.chaos[data$e14g == "agree"] <- 4
data$multiparty.chaos[data$e14g == "neither agree nor disagree"] <- 3
data$multiparty.chaos[data$e14g == "disagree"] <- 2
data$multiparty.chaos[data$e14g == "disagree strongly"] <- 1
data$multiparty.chaos[data$e14g == "dont know"] <- NA
data$multiparty.chaos[data$e14g == "refuse to answer"] <- NA

# E14h. 如果经济发展过快,就会危及社会稳定

data$bubble.chaos[data$e14h == "agree strongly"] <- 5
data$bubble.chaos[data$e14h == "agree"] <- 4
data$bubble.chaos[data$e14h == "neither agree nor disagree"] <- 3
data$bubble.chaos[data$e14h == "disagree"] <- 2
data$bubble.chaos[data$e14h == "disagree strongly"] <- 1
data$bubble.chaos[data$e14h == "dont know"] <- NA
data$bubble.chaos[data$e14h == "refuse to answer"] <- NA

# G3c. 富裕的人能让子女比其他人获得更好的教育

data$education.inequality.ok[data$g3c == "agree strongly"] <- 5
data$education.inequality.ok[data$g3c == "agree"] <- 4
data$education.inequality.ok[data$g3c == "neither agree nor disagree"] <- 3
data$education.inequality.ok[data$g3c == "disagree"] <- 2
data$education.inequality.ok[data$g3c == "disagree strongly"] <- 1
data$education.inequality.ok[data$g3c == "dont know"] <- NA
data$education.inequality.ok[data$g3c == "refuse to answer"] <- NA

# G3d. 不允许农村流动人口享受城市中的福利

data$exclude.migrants.ok[data$g3d == "agree strongly"] <- 5
data$exclude.migrants.ok[data$g3d == "agree"] <- 4
data$exclude.migrants.ok[data$g3d == "neither agree nor disagree"] <- 3
data$exclude.migrants.ok[data$g3d == "disagree"] <- 2
data$exclude.migrants.ok[data$g3d == "disagree strongly"] <- 1
data$exclude.migrants.ok[data$g3d == "dont know"] <- NA
data$exclude.migrants.ok[data$g3d == "refuse to answer"] <- NA

# G3f. 在就业机会少的情况下,应该让男人出外工作,女 人在家照顾家庭

data$traditional.gender.roles[data$g3f == "agree strongly"] <- 5
data$traditional.gender.roles[data$g3f == "agree"] <- 4
data$traditional.gender.roles[data$g3f == "neither agree nor disagree"] <- 3
data$traditional.gender.roles[data$g3f == "disagree"] <- 2
data$traditional.gender.roles[data$g3f == "disagree strongly"] <- 1
data$traditional.gender.roles[data$g3f == "dont know"] <- NA
data$traditional.gender.roles[data$g3f == "refuse to answer"] <- NA

# G3g. 在各行各业中,女人和男人应该有同样的就业和  升的机会

data$gender.equality[data$g3g == "agree strongly"] <- 5
data$gender.equality[data$g3g == "agree"] <- 4
data$gender.equality[data$g3g == "neither agree nor disagree"] <- 3
data$gender.equality[data$g3g == "disagree"] <- 2
data$gender.equality[data$g3g == "disagree strongly"] <- 1
data$gender.equality[data$g3g == "dont know"] <- NA
data$gender.equality[data$g3g == "refuse to answer"] <- NA

# G3h. 为了保证平等,由政府给予社会下层的人们一些额 外的帮助

data$gov.redistribution[data$g3h == "agree strongly"] <- 5
data$gov.redistribution[data$g3h == "agree"] <- 4
data$gov.redistribution[data$g3h == "neither agree nor disagree"] <- 3
data$gov.redistribution[data$g3h == "disagree"] <- 2
data$gov.redistribution[data$g3h == "disagree strongly"] <- 1
data$gov.redistribution[data$g3h == "dont know"] <- NA
data$gov.redistribution[data$g3h == "refuse to answer"] <- NA

# K35. 请问您相信神的存在吗?

data$believe.god[data$k35 == "yes, and I feel certain about it"] <- 3
data$believe.god[data$k35 == "yes, but I have some doubts"] <- 2
data$believe.god[data$k35 == "no"] <- 1
data$believe.god[data$k35 == "don't know"] <- NA
data$believe.god[data$k35 == "no answer"] <- NA

# K36. 请问您相信人去世后会有灵魂的存在吗?

data$believe.afterlife[data$k36 == "yes"] <- 3
data$believe.afterlife[data$k36 == "somewhat"] <- 2
data$believe.afterlife[data$k36 == "no"] <- 1
data$believe.afterlife[data$k36 == "dont know"] <- NA
data$believe.afterlife[data$k36 == "no answer"] <- NA

################################################################################################
############################### Feelings/Trust Recoding  #######################################
################################################################################################

# B9. 请问您对以下这几类人是非常信任、比较信任、不太信任,还是非常不信任? 
# B9a. 城里人

data$trust.urbanites[data$b9a == "very much"] <- 4
data$trust.urbanites[data$b9a == "somewhat trust them"] <- 3
data$trust.urbanites[data$b9a == "don't trust them very much"] <- 2
data$trust.urbanites[data$b9a == "don't trust them at all "] <- 1
data$trust.urbanites[data$b9a == "not applicable"] <- NA
data$trust.urbanites[data$b9a == "don't know"] <- NA
data$trust.urbanites[data$b9a == "no answer"] <- NA

# B9b. 商人

data$trust.businessmen[data$b9b == "very much"] <- 4
data$trust.businessmen[data$b9b == "somewhat trust them"] <- 3
data$trust.businessmen[data$b9b == "don't trust them very much"] <- 2
data$trust.businessmen[data$b9b == "don't trust them at all "] <- 1
data$trust.businessmen[data$b9b == "not applicable"] <- NA
data$trust.businessmen[data$b9b == "don't know"] <- NA
data$trust.businessmen[data$b9b == "no answer"] <- NA

# B9c. 外地人

data$trust.outsiders[data$b9c == "very much"] <- 4
data$trust.outsiders[data$b9c == "somewhat trust them"] <- 3
data$trust.outsiders[data$b9c == "don't trust them very much"] <- 2
data$trust.outsiders[data$b9c == "don't trust them at all "] <- 1
data$trust.outsiders[data$b9c == "not applicable"] <- NA
data$trust.outsiders[data$b9c == "don't know"] <- NA
data$trust.outsiders[data$b9c == "no answer"] <- NA

# B9d. 农民

data$trust.farmers[data$b9d == "very much"] <- 4
data$trust.farmers[data$b9d == "somewhat trust them"] <- 3
data$trust.farmers[data$b9d == "don't trust them very much"] <- 2
data$trust.farmers[data$b9d == "don't trust them at all "] <- 1
data$trust.farmers[data$b9d == "not applicable"] <- NA
data$trust.farmers[data$b9d == "don't know"] <- NA
data$trust.farmers[data$b9d == "no answer"] <- NA

# B9e. 陌生人

data$trust.strangers[data$b9e == "very much"] <- 4
data$trust.strangers[data$b9e == "somewhat trust them"] <- 3
data$trust.strangers[data$b9e == "don't trust them very much"] <- 2
data$trust.strangers[data$b9e == "don't trust them at all "] <- 1
data$trust.strangers[data$b9e == "not applicable"] <- NA
data$trust.strangers[data$b9e == "don't know"] <- NA
data$trust.strangers[data$b9e == "no answer"] <- NA

# B9f. 外国人

data$trust.foreigners[data$b9f == "very much"] <- 4
data$trust.foreigners[data$b9f == "somewhat trust them"] <- 3
data$trust.foreigners[data$b9f == "don't trust them very much"] <- 2
data$trust.foreigners[data$b9f == "don't trust them at all "] <- 1
data$trust.foreigners[data$b9f == "not applicable"] <- NA
data$trust.foreigners[data$b9f == "don't know"] <- NA
data$trust.foreigners[data$b9f == "no answer"] <- NA

# B9g. 县/市领导

data$trust.local.leaders[data$b9g == "very much"] <- 4
data$trust.local.leaders[data$b9g == "somewhat trust them"] <- 3
data$trust.local.leaders[data$b9g == "don't trust them very much"] <- 2
data$trust.local.leaders[data$b9g == "don't trust them at all "] <- 1
data$trust.local.leaders[data$b9g == "not applicable"] <- NA
data$trust.local.leaders[data$b9g == "don't know"] <- NA
data$trust.local.leaders[data$b9g == "no answer"] <- NA

# B9h. 县/市领导

data$trust.provincial.leaders[data$b9h == "very much"] <- 4
data$trust.provincial.leaders[data$b9h == "somewhat trust them"] <- 3
data$trust.provincial.leaders[data$b9h == "don't trust them very much"] <- 2
data$trust.provincial.leaders[data$b9h == "don't trust them at all "] <- 1
data$trust.provincial.leaders[data$b9h == "not applicable"] <- NA
data$trust.provincial.leaders[data$b9h == "don't know"] <- NA
data$trust.provincial.leaders[data$b9h == "no answer"] <- NA

# B9i. 中央领导 N.B. Mislabelled on questionnaire as B9h still.

data$trust.national.leaders[data$b9i == "very much"] <- 4
data$trust.national.leaders[data$b9i == "somewhat trust them"] <- 3
data$trust.national.leaders[data$b9i == "don't trust them very much"] <- 2
data$trust.national.leaders[data$b9i == "don't trust them at all "] <- 1
data$trust.national.leaders[data$b9i == "not applicable"] <- NA
data$trust.national.leaders[data$b9i == "don't know"] <- NA
data$trust.national.leaders[data$b9i == "no answer"] <- NA

# B10. 您接受不接受下面这几类人?
# B10a. 腐败的官员

data$accept.corrupt.officials[data$b10a == "yes"] <- 1
data$accept.corrupt.officials[data$b10a == "no"] <- 0
data$accept.corrupt.officials[data$b10a == "don't know"] <- NA
data$accept.corrupt.officials[data$b10a == "no answer"] <- NA

# B10b. 批评中国共产党的人

data$accept.ccp.critics[data$b10b == "yes"] <- 1
data$accept.ccp.critics[data$b10b == "no"] <- 0
data$accept.ccp.critics[data$b10b == "don't know"] <- NA
data$accept.ccp.critics[data$b10b == "no answer"] <- NA

# B10c. 卖淫的人

data$accept.prostitutes[data$b10c == "yes"] <- 1
data$accept.prostitutes[data$b10c == "no"] <- 0
data$accept.prostitutes[data$b10c == "don't know"] <- NA
data$accept.prostitutes[data$b10c == "no answer"] <- NA

# B10d. 同性恋者

data$accept.gay.people[data$b10d == "yes"] <- 1
data$accept.gay.people[data$b10d == "no"] <- 0
data$accept.gay.people[data$b10d == "don't know"] <- NA
data$accept.gay.people[data$b10d == "no answer"] <- NA

# B10e. 吸毒者

data$accept.drug.users[data$b10e == "yes"] <- 1
data$accept.drug.users[data$b10e == "no"] <- 0
data$accept.drug.users[data$b10e == "don't know"] <- NA
data$accept.drug.users[data$b10e == "no answer"] <- NA

################################################################################################
############################### Demographic Recoding  ##########################################
################################################################################################

# age = age
# income = family income (家庭年总收入)


data$female[data$gender == "female"] <- 1
data$female[data$gender == "male"] <- 0
data$female[data$gender == "NA"] <- NA

# Education Variable required some construction. Resolved encoding errors with reference to Stata. For some reason for 高中 or 大专 line could not be assigned to a number, so assigned in two parts.

data$edulevel.pre[data$edu == "\xce\xc4ä"] <- 1  # 文盲
data$edulevel.pre[data$edu == "Сѧ"] <- 2  # 小学
data$edulevel.pre[data$edu == "\xb3\xf5\xd6\xd0"] <- 3  # 初中
data$edulevel.pre[data$edu == "\xb8\xdf\xd6\xd0"] <- 4  # 高中
data$edulevel.pre[data$edu == levels(data$edu)[5]] <- "B" # 职高 
data$edulevel.pre[data$edu == "\xb4\xf3ר"] <- "A" # 大专
data$edulevel.pre[data$edu == "<b4><f3>ѧ"] <- 6  # 大学
data$edulevel.pre[data$edu %in% c("\u02f6ʿ", "\xb2\xa9ʿ")] <- 7  # 硕士 or 博士
data$edulevel.pre[data$edu == "NA"] <- NA

data$edulevel[data$edulevel.pre == "1"] <- 1
data$edulevel[data$edulevel.pre == "2"] <- 2
data$edulevel[data$edulevel.pre == "3"] <- 3
data$edulevel[data$edulevel.pre == "4"] <- 4
data$edulevel[data$edulevel.pre %in% c("A", "B")] <- 5
data$edulevel[data$edulevel.pre == "6"] <- 6
data$edulevel[data$edulevel.pre == "7"] <- 7
data$edulevel[data$edulevel.pre == "NA"] <- NA


data$rural.hukou[data$a9 == "rural"] <- 1
data$rural.hukou[data$a9 == "urban"] <- 0
data$rural.hukou[data$a9 == "no answer"] <- NA

data$ccp[data$e6_a == "yes"] <- 1
data$ccp[data$e6_a %in% c("no", "once was", "no participation")] <- 0
data$ccp[data$e6_a %in% c("dont know", "no answer")] <- NA

# Asked what was top reason for joining the CCP 

data$why.ccp.1st <- data$e6bc_s1
data$why.ccp.1st[data$e6bc_s1 %in% c("dont know", "rufuse to answer")] <- NA

# Asked what was second reason for joining the CCP

data$why.ccp.2nd <- data$e6bc_s2
data$why.ccp.2nd[data$e6bc_s2 %in% c("dont know", "rufuse to answer")] <- NA

# Asked what was third reason for joining the CCP

data$why.ccp.3rd <- data$e6bc_s3
data$why.ccp.3rd[data$e6bc_s3 %in% c("dont know", "rufuse to answer")] <- NA

################################################################################################
############################## Appendix Figure A.1 - Barplot ##################################
################################################################################################



barplot <- ggplot(data, aes(x = own.ideology)) + 
  geom_bar(fill="slategray2") + 
  scale_x_continuous(breaks = c(0:10), limits = c(-0.5,10.5)) + 
  xlab("Self-Placement on the Left-Right Scale") + 
  ylab("Number of Respondents") + 
  theme_bw() + 
  annotate("text", label = "Nonresponse Rate:\n 67.7%", x = 8.9, y = 700, size = 6) +
  theme(text = element_text(size=16))



pdf("2008_barplot.pdf", width = 9, height = 6)
barplot
dev.off()


################################################################################################
########## Appendix Figure A.2:  Correlation Analysis of Left-Right Placement  ##################
################################################################################################

## Create list of questions to select

predictorsList <- names(data[,c(599:603, 605:606, 608:611, 613:628)])

## Create list of correlations

correlations <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

## Create names for our questions and categorize by issue

questionLabels <- c("I prefer to be a Chinese citizen", "China has some issues that make me feel shame", "Foreigners should be more like Chinese people", "China is better than most other countries", "Should support government even when it is wrong", "Government should not interfere in private lives", "The state has too much power", "Development takes precedence over the environment", "Should restrict imports to protect the economy", "Chinese TV/film should get preferential treatment", "Importing foreign media erodes Chinese culture", "Individuals should sacrifice for state interests", "Cannot be Chinese without respect for tradition", "China should respect international human rights", "Protests produce turmoil and instability", "Single party regime is the most stable for China", "Should restrict protest activity", "Strong state key to resolving economic issues", "Diversity of thought leads to a chaotic society", "Market competition harms social stability", "Multiparty systems lead to political disorder", "If the economy overheats, it endangers stability", "Fair if children of elite get better educations", "Fair if migrants excluded from social welfare", "Fair if men have more employment opportunities", "Fair for men and women to have same opportunities", "Fair if government provides extra support to poor")

issueType <- c("Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Economic\nIssues", "Social\nIssues", "Social\nIssues", "Social\nIssues", "Social\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Political\nIssues", "Economic\nIssues", "Political\nIssues", "Economic\nIssues", "Social\nIssues", "Social\nIssues", "Social\nIssues", "Social\nIssues", "Economic\nIssues")

# Create data frame of correlations and then order them

bivariateCorrs <- cbind(predictorsList, correlations, questionLabels, issueType)

orderedCorrs <- bivariateCorrs[order(bivariateCorrs$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs)[names(orderedCorrs) == 'predictorsList'] <- 'orderedPredictors'

# make the questionLabels an ordered factor for ggplot

orderedCorrs$questionLabels <- factor(orderedCorrs$questionLabels, levels = orderedCorrs$questionLabels[order(orderedCorrs$estimate)])

## This function creates the correlation plot of the questions

credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0) +
    geom_vline(xintercept = 0, linetype=2) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.1), limits = c(-0.157, 0.131)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}


#### Equivalence Analysis to add 90% Confidence intervals and to compare them to a negligible effect size (correlation = 0.13) ####
#### See Rainey, Carlisle (2014). Arguing for a Negligible Effect. American Journal of Political Science 58(4), 1083–1091.
#### See also Lakens, Daniël (2017). Equivalence Tests: A Practical Primer for t Tests, Correlations, and Meta-Analyses. Social Psychological and Personality Science 8(4), 355–362.

## Create list of correlations again, this time with 0.9 conf.level

correlations90 <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson", conf.level = 0.9))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

## Create names for our questions and categorize by issue

questionLabels <- c("I prefer to be a Chinese citizen", "China has some issues that make me feel shame", "Foreigners should be more like Chinese people", "China is better than most other countries", "Should support government even when it is wrong", "Government should not interfere in private lives", "The state has too much power", "Development takes precedence over the environment", "Should restrict imports to protect the economy", "Chinese TV/film should get preferential treatment", "Importing foreign media erodes Chinese culture", "Individuals should sacrifice for state interests", "Cannot be Chinese without respect for tradition", "China should respect international human rights", "Protests produce turmoil and instability", "Single party regime is the most stable for China", "Should restrict protest activity", "Strong state key to resolving economic issues", "Diversity of thought leads to a chaotic society", "Market competition harms social stability", "Multiparty systems lead to political disorder", "If the economy overheats, it endangers stability", "Fair if children of elite get better educations", "Fair if migrants excluded from social welfare", "Fair if men have more employment opportunities", "Fair for men and women to have same opportunities", "Fair if government provides extra support to poor")

issueType <- c("Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Economic\nIssues", "Social\nIssues", "Social\nIssues", "Social\nIssues", "Social\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Political\nIssues", "Economic\nIssues", "Political\nIssues", "Economic\nIssues", "Social\nIssues", "Social\nIssues", "Social\nIssues", "Social\nIssues", "Economic\nIssues")

# Create data frame of correlations and then order them

bivariateCorrs90 <- cbind(predictorsList, correlations90, questionLabels, issueType)

orderedCorrs90 <- bivariateCorrs90[order(bivariateCorrs90$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs90)[names(orderedCorrs90) == 'predictorsList'] <- 'orderedPredictors'

# make the questionLabels an ordered factor for ggplot

orderedCorrs90$questionLabels <- factor(orderedCorrs90$questionLabels, levels = orderedCorrs90$questionLabels[order(orderedCorrs90$estimate)])

## This function creates the correlation plot of the questions, this time with negligible effect sizes. The duplication of the function is to include both sets of confidence intervals.

credplot.gg <- function(d1, d2){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d1, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0, size = 1) +
    geom_errorbarh(height = 0, data = d2, aes(xmin=conf.low, xmax=conf.high)) +
    geom_vline(xintercept = 0, linetype=5) +
    ## Negligible effect size
    geom_vline(xintercept = -0.13, linetype=3) +
    geom_vline(xintercept = 0.13, linetype=3) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.1), limits = c(-0.16, 0.16)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}


### Makes figure A.2 in Appendix
pdf("2008_L-R_Correlations90.pdf", width = 8.5, height = 8)
credplot.gg(orderedCorrs90, orderedCorrs)
dev.off()




#####################################################################################################################
#################### Appendix Figure A.3: Correlation Analysis of Partisan and Symbolic Issues  #####################
#####################################################################################################################


## Create list of partisan and symbolic variables

symbolicList.08 <- names(data[,c(649, 640:644)])

correlations.sym08 <- Reduce(rbind, lapply(symbolicList.08, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels


symLabels <- c("Communist Party Membership", "Accept corrupt officials", "Accept CCP critics", "Accept Prostitutes", "Accept Gay People", "Accept drug users")


# Create vector of issue types

issueType <- c("2008\nSurvey", "2008\nSurvey", "2008\nSurvey", "2008\nSurvey", "2008\nSurvey", "2008\nSurvey")

bivariateCorrs.sym08 <- cbind(symbolicList.08, correlations.sym08, symLabels, issueType)


# ordered by estimate

orderedCorrs.sym08 <- bivariateCorrs.sym08[order(bivariateCorrs.sym08$estimate), ]

# orderedCorrs.sym08 <- bivariateCorrs.sym08

# rename columns of names to orderedPredictors

names(orderedCorrs.sym08)[names(orderedCorrs.sym08) == 'symbolicList.08'] <- 'orderedPredictors'

# make the symLabels an ordered factor for ggplot

orderedCorrs.sym08$symLabels <- factor(orderedCorrs.sym08$symLabels, levels = orderedCorrs.sym08$symLabels[order(orderedCorrs.sym08$estimate)])

# orderedCorrs.sym08$symLabels <- factor(orderedCorrs.sym08$symLabels)

# Save our file so we can combine later
save(orderedCorrs.sym08, file = "symCorrs08.Rda")



credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=symLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point() +
    geom_errorbarh(height = 0) +
    geom_vline(xintercept = 0, linetype=2) +
    scale_x_continuous(breaks = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5), limits = c(-0.59, 0.59)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}


### Equivalence Analysis of Symbolic Issues


correlations.sym0890 <- Reduce(rbind, lapply(symbolicList.08, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson", conf.level = 0.9))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels


symLabels <- c("Communist Party Membership", "Accept corrupt officials", "Accept CCP critics", "Accept Prostitutes", "Accept Gay People", "Accept drug users")


# Create vector of issue types

issueType <- c("2008\nSurvey", "2008\nSurvey", "2008\nSurvey", "2008\nSurvey", "2008\nSurvey", "2008\nSurvey")

bivariateCorrs.sym0890 <- cbind(symbolicList.08, correlations.sym0890, symLabels, issueType)


# ordered by estimate

orderedCorrs.sym0890 <- bivariateCorrs.sym0890[order(bivariateCorrs.sym0890$estimate), ]

# orderedCorrs.sym08 <- bivariateCorrs.sym08

# rename columns of names to orderedPredictors

names(orderedCorrs.sym0890)[names(orderedCorrs.sym0890) == 'symbolicList.08'] <- 'orderedPredictors'

# make the symLabels an ordered factor for ggplot

orderedCorrs.sym0890$symLabels <- factor(orderedCorrs.sym0890$symLabels, levels = orderedCorrs.sym0890$symLabels[order(orderedCorrs.sym0890$estimate)])

# orderedCorrs.sym08$symLabels <- factor(orderedCorrs.sym08$symLabels)

# Save our file so we can combine later
save(orderedCorrs.sym0890, file = "symCorrs0890.Rda")



credplot.gg <- function(d1, d2){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d1, aes(y=symLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0, size = 1) +
    geom_errorbarh(height = 0, data = d2, aes(xmin=conf.low, xmax=conf.high)) +
    geom_vline(xintercept = 0, linetype=5) +
    geom_vline(xintercept = -0.13, linetype=3) +
    geom_vline(xintercept = 0.13, linetype=3) +
    scale_x_continuous(breaks = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5), limits = c(-0.59, 0.59)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

## Makes Figure A.3 in the Appendix

pdf("2008_L-R_correlations_sym90.pdf", width = 8.5, height = 6)
credplot.gg(orderedCorrs.sym0890, orderedCorrs.sym08)
dev.off()


